#!/bin/sh

VERBOSE=0
CACHE_ROOT=/var/cache/bootcache
# . /etc/default/bootcache
. /etc/init.d/bootcache-timing.sh ''

FILECACHE="filecache --verbose $VERBOSE"

TASK=${2:-boot}
TASK_ROOT="$CACHE_ROOT/$TASK"
PRELOAD_ROOT="$TASK_ROOT/preload"
SNAPSHOT_ROOT="$TASK_ROOT/snapshot"
DEFRAG_ROOT="$TASK_ROOT/defrag"
DEFRAG_HABITAT=".defrag-habitat-$TASK"

function print_help()
{
    echo "Usage: $0 start|stop|preload|defrag|defrag-now|clear [task]"
}

function rotate_snapshots()
{
    local s="$SNAPSHOT_ROOT"
    local p="$PRELOAD_ROOT"
    [ -e "${s}3" ] && rm -fr "${s}3" "${p}3"
    [ -e "${s}2" ] && mv "${s}2" "${s}3" && mv "${p}2" "${p}3"
    [ -e "${s}1" ] && mv "${s}1" "${s}2" && mv "${p}1" "${p}2"
    [ -e "${s}" ]  && mv "${s}"  "${s}1" && mv "${p}"  "${p}1"
    mkdir -p "${s}"
}

function do_start()
{
    rotate_snapshots

    $FILECACHE --snapshot --dump "$SNAPSHOT_ROOT/start"
}

function do_stop()
{
    # the boot task will not be calling start
    if [ -d "$SNAPSHOT_ROOT/stop" ]; then
        rotate_snapshots
    fi

    # ditto
    if [ ! -d "$SNAPSHOT_ROOT/start" ]; then
        mkdir -p "$SNAPSHOT_ROOT/start"
    fi

    rm -f "$PRELOAD_ROOT"/*

    $FILECACHE --snapshot "state.include?(?R) or \
                           state.include?(?A) or \
                           state.include?(?M)" \
                    --dump "$SNAPSHOT_ROOT/stop" \
                    --difference "$SNAPSHOT_ROOT/start" \
                    --prune-path '^(/var/|/tmp/)' \
                    --complete 80 \
                    --dump "$PRELOAD_ROOT"
}


function setup_queue
{
    QUEUES=`echo /sys/block/*/queue`
    ELEVATORS=
    local deadline=no
    for queue in $QUEUES
    do
        local elevators=$(<$queue/scheduler)
        for elv in $elevators
        do
            case "$elv" in
                '[noop]')
                        oelv=noop
                        ;;
                '[cfq]')
                        oelv=cfq
                        ;;
                'deadline')
                        deadline=yes
                        ;;
                '[deadline]')
                        deadline=yes
                        oelv=deadline
                        ;;
                '[anticipatory]')
                        oelv=anticipatory
                        ;;
            esac
        done
        
        [ $deadline = 'yes' ] || modprobe deadline-iosched ||
            { echo "deadline elevator is required for preloading"; exit 1; }

        ELEVATORS="$ELEVATORS $oelv"
        echo deadline   >   $queue/scheduler
        echo 0          >   $queue/iosched/read_expire
        echo 8          >   $queue/iosched/fifo_batch
        echo 64         >   $queue/iosched/seek_dist
        echo 10240      >   $queue/nr_requests
    done
}

function reset_queue
{
    for queue in $QUEUES
    do
        elv=$1
        shift
        if [ -z "$elv" ]; then
            return
        elif [ $elv != 'deadline' ]; then
            echo $elv > $queue/scheduler
        else
            echo 500  > $queue/iosched/read_expire
        fi
        echo 128  > $queue/nr_requests
    done
}

function do_preload
{
    [ -d "$PRELOAD_ROOT" ] || { echo "no preload data for task '$TASK'"; Return 1; }

    [ -x /usr/bin/renice ] && /usr/bin/renice 19 $$ > /dev/null
    [ -x /usr/bin/ionice ] && /usr/bin/ionice -c3 -p$$

    wait_for_process_start udevd
    sleep 1

    readahead-fs -t 5 "$PRELOAD_ROOT/bdev" &

    local devs=`cd "$PRELOAD_ROOT"; echo *`

    for dev in ${devs/bdev}
    do
        file="$PRELOAD_ROOT/$dev"
        [ -f "$file" ] || continue

        (
            for i in 1 2 3 4 5 6 7 8 9 10
            do
                [ -z "$RUNLEVEL" ]                && break  # early boot time?
                grep -q ${dev//\//-} /etc/mtab    && break  # device mounted?
                grep -q ${dev//\//-} /proc/mounts && break  # yeah, both is needed
                sleep $i
            done

            [ $i != 10 ] && readahead-fs -t 300 "$file"
        )&
    done
}

function schedule_defrag()
{
    if [ ! -d "$PRELOAD_ROOT" ]; then
        echo "sorry, no preload data for task '$TASK'"
        return 1
    fi

    $FILECACHE --load "$PRELOAD_ROOT" \
                    --delete-if 'f.size > 300 or f.size > (4 + 1.2 * f.cached)' \
                    --prune-path '/font' \
                    --dump "$DEFRAG_ROOT"
}

function do_habitat_dir()
{
    while read dev mp fstype options
    do
        local defrag_dir="$mp/$DEFRAG_HABITAT"

        [ -b "$dev" ] || continue

        # delete task or prepare for migration to a better group?
        if [ $1 = 'rmdir' ]; then
            [ -d "$defrag_dir" ] && rmdir "$defrag_dir"
            continue
        fi

        # defrag habitat dirs should be
        # consistent/persistent - created once, reuse afterwards
        [ -d "$defrag_dir" ] && continue

        # The oldalloc option for ext2/3 filesystem instructs it to create top
        # level dirs in the most empty group. One group is normally 100M sized,
        # so we should limit the total size of moved files.  One observation
        # is that the orlov allocator makes unbalanced use of groups(check it
        # out for yourself using `dumpe2fs /dev/hda2 | grep free`), which is
        # exactly what we want ;-)
        #
        # Moving inodes to the habitat dir helps make continuous data blocks
        # and inode blocks.  The only stuffs remaining disordered are the
        # directory file blocks.  A simple experiment shows that one set of
        # 1578 cached system files only involves 259 uniq paths. So the
        # remained fragmentation is expected to be trivial enough.
        if [ $fstype = 'ext2' -o $fstype = 'ext3' ]; then
            mount -o oldalloc,remount $mp
        fi

        mkdir "$defrag_dir"

        if [ $fstype = 'ext2' -o $fstype = 'ext3' ]; then
            mount -o orlov,remount $mp
        fi
    done < /etc/mtab
}

function do_defrag()
{
    # prepare the defrag habitat dirs for each filesystem
    do_habitat_dir mkdir

    # ok, someone insists on doing the defrag...
    if [ ! -d "$DEFRAG_ROOT" ]; then
        [ $VERBOSE -gt 0 ] && echo "preparing defrag task $task for immediate run ..."
        schedule_defrag || return
    fi

    # do poor man's defrag
    echo "running poor man's defrag for task '$TASK' ..."
    $FILECACHE --load "$DEFRAG_ROOT" \
                --defrag "$DEFRAG_HABITAT"

    # cleanup the defrag root, or bootcache.sh will call us again and again!
    local retval=$?
    rm -fr "$DEFRAG_ROOT.old"
    mv "$DEFRAG_ROOT" "$DEFRAG_ROOT.old"
    [ $retval -eq 0 ] || { echo "failed to defrag '$TASK'"; return $retval; }

    # update the moved fs metadata
    # it will include old metadata from the deleted inodes
    # so running an update on the next boot is recommended
    $FILECACHE --snapshot 'state.include?(?D)' \
                --delete-if 'not f.blockdev?' \
                --union "$PRELOAD_ROOT/bdev" \
                --dump "$PRELOAD_ROOT"
}

function do_clear()
{
    rm -fr "$TASK_ROOT"
    do_habitat_dir rmdir
}

case "$1" in
    start)
        do_start
        ;;
    stop)
        do_stop
        ;;
    preload)
        trap "reset_queue; exit" EXIT HUP INT ABRT QUIT SEGV TERM
        setup_queue
        do_preload
        wait_for_process_stop readahead-fs; sleep 10
        reset_queue $ELEVATORS
        ;;
    defrag)
        schedule_defrag
        if [ $? -eq 0 -a $VERBOSE -gt 0 ]; then
            echo "Defrag task $task scheduled, will run it on reboot/shutdown."
            echo "It is recommended to take a new snapshot on next boot,"
            echo "for the defrag will make our metadata info out of sync."
        fi
        ;;
    defrag-now)
        do_defrag
        ;;
    create-habitat)
        do_habitat_dir mkdir
        ;;
    remove-habitat)
        do_habitat_dir rmdir
        ;;
    clear)
        do_clear
        ;;
    *)
        print_help
        ;;
esac

# vim: ts=4 sw=4 et
